/****************

ITA_state.do
--Merge ITA.dta nd state.dta to get union membership estimates for workplaces

****************/

/*When this script is executed, Stata's working directory should be set to the Project/ folder*/

*Load ITA data

use "Data\IntermediateData\ITA.dta", clear

*merge in state.dta data using m:1; uniquely identified by state, sector, and year

local key "state sector year"
local keepvars "empl member covered pctmem pctcov pctmem100 pctcov100 construction manufacturing private public"

merge m:1 `key' using "Data\IntermediateData\state_clean.dta", keepusing(`keepvars') keep(3)

*drop redundant variables and variables not relevant to the analysis

local drop_vars "id naics_code total_deaths total_dafw_cases total_djtr_cases total_other_cases total_djtr_days total_poisonings total_skin_disorders total_hearing_loss total_other_illnesses establishment_type no_injuries_illnesses industry_description created_timestamp years_available naics_code2 _merge pctmem pctcov no_injuries_illnesses"

drop `drop_vars'

*There are no public sector Ohio workplaces in the dataset, so there is no adequate control group for KY public workplaces -- remove these 15 workplaces from the dataset

drop if public == 1 
drop public

describe // 17,276 observations remaining

*log transform skewed variables, as indicated by histogram analysis

rename total_respiratory_conditions total_resp_cond

local trans_vars "ann_empl total_hours_worked total_dafw_days total_injuries total_resp_cond"
foreach var of local trans_vars{
	gen `var'_plus=`var'+1
	gen log_`var'=log(`var'_plus)
}

*drop intermediate vars

drop ann_empl_plus total_hours_worked_plus total_dafw_days_plus  total_injuries_plus total_resp_cond_plus

*generate injuries-per-worker to adjust injury measurement for the size of the establishment

gen inj_per_worker = total_injuries/ann_empl
gen log_ipw = log(inj_per_worker+1)

*create interaction terms for regressions

local int_list "construction manufacturing private total_hours_worked ann_empl log_total_hours_worked log_ann_empl"
foreach var of local int_list{
	gen `var'_KY = `var'*state_dummy
	gen `var'_post = `var'*time_dummy_2017
	gen `var'_KY_post = `var'_KY*`var'_post
}

label variable log_total_dafw_days "Log(Days Away)"
label variable total_injuries "Total injuries"
label variable state_dummy "KY"
label variable time_dummy_2017 "Post-treatment"
label variable state_time "KY, Post-treatment"
label variable construction "Construction"
label variable manufacturing "Manufacturing"
label variable private "Private"
label variable ann_empl "Average employees"
label variable total_hours_worked "Total hours worked"
label variable total_dafw_days "Total days away"
label variable total_injuries "Total injuries"
label variable total_resp_cond "Total respiratory conditions"
label variable log_ann_empl "Log(Average employees)"
label variable log_total_hours_worked "Log(Total hours worked)"
label variable log_total_injuries "Log(Injuries)"
label variable log_total_resp_cond "Log(Respriatory conditions)"
label variable inj_per_worker "Injuries per worker"
label variable log_ipw "Log(IPW)"
label variable construction_KY "Construction, KY"
label variable construction_post "Construction, Post"
label variable construction_KY_post "Construction, KY, Post"
label variable manufacturing_KY "Manufacturing, KY"
label variable manufacturing_post "Manufacturing, Post"
label variable manufacturing_KY_post "Manufacturing, KY, Post"
label variable private_KY "Private*KY"
label variable private_post "Private*Post"
label variable private_KY_post "Private*KY*Post"
label variable total_hours_worked_KY "Total hours worked, KY"
label variable total_hours_worked_post "Total hours worked, Post"
label variable total_hours_worked_KY_post "Total hours worked, KY, Post"
label variable ann_empl_KY "Annual employees, KY"
label variable ann_empl_post "Annual employees, Post"
label variable ann_empl_KY_post "Annual employees, KY, Post"
label variable log_ann_empl_KY "Log(Annual employees, KY)"
label variable log_ann_empl_post "Log(Annual employees, Post)"
label variable log_ann_empl_KY_post "Log(Annual employees, KY, Post)"


save "Data\AnalysisData\ITA_state.dta", replace